from pyspark.context import SparkContext

sc = SparkContext(master="local", appName="word_count")

# 读取数据
students_rdd = sc.textFile("../../data/students.txt", 3)

kv_rdd = students_rdd.map(lambda x:(x.split(',')[-1],1))


mun_rdd = kv_rdd.aggregateByKey(
    0,
    lambda x,y:x+y,
    lambda a,b:a+b
)
mun_rdd.foreach(print)